Dr. Yang Ye <Email:yy@runchee.com>
Nov 23, 2017
library(ggplot), author Hadley Wickham. First release on June 10, 2007.
It’s part of the exploration of the data via visualization.
library(ggplot2)
ggplot(airquality, aes(Temp, Ozone)) +
geom_point() +
geom_smooth(method = "loess", se = FALSE)
## Warning: Removed 37 rows containing non-finite values (stat_smooth).
## Warning: Removed 37 rows containing missing values (geom_point).Definition of data + Definitions of layers
ggplot(data = , …) +
The definition of data will pass down to the layers. But layers can have its own data.
ggplot(data = d1, …) + geom_point() + # this would get data = d1 geom_point(data = d2, …) # this would get data = d2
Put the + sign in the end of the line, not the beginning of the line.
ggplot(bank, aes(age, balance)) + geom_point()ggplot(bank, aes(age, balance, color = job)) + geom_point()If you don’t know the column name, use aes_string to pass variable name as string/character.
ggplot(bank, aes_string("age", "balance", color = "job")) + geom_point()
ggplot(bank, aes(default, age)) + geom_point()ggplot(bank, aes(age, default)) + geom_point()ggplot(bank, aes(job, age)) + geom_point()ggplot(bank, aes(age, balance)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam'ggplot(bank, aes(age, balance, color = job)) + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess'aes downggplot(bank, aes(x = age, y = duration)) +
geom_smooth() +
geom_point()
## `geom_smooth()` using method = 'gam'
# This is equivalent to below
ggplot(bank) +
geom_point(aes(x = age, y = duration)) +
geom_smooth(aes(x = age, y = duration))
## `geom_smooth()` using method = 'gam'
# But we can do specify different data for two geom_smooth()
ggplot(bank) +
geom_point(aes(x = age, y = duration)) +
geom_smooth(data = filter(bank, age > median(age)), aes(x = age, y = duration), color = "green") +
geom_smooth(data = filter(bank, age <= median(age)), aes(x = age, y = duration), color = "red")
## `geom_smooth()` using method = 'gam'
## `geom_smooth()` using method = 'gam'# adjust legent position
ggplot(bank, aes(x = age, y = duration, color = job)) +
geom_point() +
theme(legend.position="bottom")
ggplot(bank, aes(x = age, y = duration, color = job)) +
geom_point() +
theme(legend.position="left")
# Different feeling?
ggplot(bank, aes(x = age, y = duration, color = job)) +
geom_point() +
theme(legend.position="left") +
coord_flip()
# Make y as log scaled.
# Note that before flip, x is y, so we use scale_y_log10()
ggplot(bank, aes(x = age, y = duration, color = job)) +
geom_point() +
theme(legend.position="left") +
coord_flip() +
scale_y_log10()+ is a layer# Nearly empty chart.
g <- ggplot(bank, aes(x = age, y = duration))
g
# This is almost empty
g <- ggplot(bank)
g
# This is really empty.
g <- ggplot()
gg with layersggplot(bank, aes(x = age, y = duration)) +
geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam'
# This is equivalent to above
g <- ggplot(bank, aes(x = age, y = duration))
g + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'gam'# g can be re-used. It's good to be used when we want to exploratory data.
# Fixed a few variables in `g <- ggplot(data, aes(...))`.
# Use `g + geom_XXX()` to find the best representation for the relationship.
g + geom_point() + geom_smooth(method = "lm") + facet_grid(. ~ job)g + geom_point(color = "steelblue", size = 4, alpha = 1/2)g + geom_point(aes(color = job), size = 4, alpha = 1/2)g + geom_point() + geom_point(aes(color = job), size = 4, alpha = 1/2) color/shape/size/alpha/group to differentiate to different groups.ggplot(bank) +
geom_point(aes(age, duration, shape = contact))
ggplot(bank) +
geom_point(aes(age, duration, color = contact))
ggplot(bank) +
geom_point(aes(age, duration, size = contact))
## Warning: Using size for a discrete variable is not advised.
ggplot(bank) +
geom_point(aes(age, duration, alpha = contact))
ggplot(bank) +
geom_point(aes(age, duration, group = contact))## you can also enforce color, put things outside aes
ggplot(bank) +
geom_point(aes(age, duration), color = "blue", size = 10)Which variables are continuous?
ggplot(bank, aes(age, job)) + geom_point()
# Reverse a categorical variable, we use rev(levels(...)).
# Reverse a continous numerical variable, we use scale_x_reverse().
ggplot(bank, aes(age, job)) +
geom_point() +
scale_y_discrete(limit = rev(levels(bank$job)))## Warning: Using size for a discrete variable is not advised.
ggplot(bank, aes(job, duration)) + geom_boxplot()ggplot(bank, aes(job, age)) + geom_boxplot()ggplot(bank, aes(balance, color = job)) + geom_density()ggplot(bank, aes(duration, fill = job)) + geom_density()ggplot(bank, aes(age, color = job, alpha = 0.3)) + geom_density()# Which is better?
ggplot(bank, aes(age, color = job, fill = job, alpha = 0.3)) + geom_density()ggplot(data = bank, mapping = aes(x = duration, fill = job)) + geom_histogram(binwidth = 2)ggplot(data = bank, mapping = aes(x = duration, fill = job)) + geom_histogram(binwidth = 100)ggplot(data = bank, mapping = aes(x = age, fill = job)) + geom_histogram(binwidth = 10)ggplot(data = bank, mapping = aes(x = age, colour = job)) + geom_freqpoly(binwidth = 10)# first input parameter to geom_bar is mapping, so we can skip it.
ggplot(bank) + geom_bar(mapping = aes(x = age))# in short, we skip mapping
ggplot(bank) + geom_bar(aes(x = age))
# comparing to colour, for Bar, we better use fill
# ggplot(data = bank, ) + geom_bar(aes(x = age, colour = job))
ggplot(bank) + geom_bar(mapping = aes(x = age, fill = job))ggplot(bank) +
geom_bar(mapping = aes(x = job))# Color doesn't work, because age is a continous variable.
ggplot(bank) +
geom_bar(mapping = aes(x = job, fill = age)) ggplot(bank) + geom_bar(mapping = aes(x = age, fill = job))# fill to 100%
ggplot(bank) + geom_bar(mapping = aes(x = age, fill = job), position = "fill")# dodge means "adaptive width of the bar"
ggplot(bank) + geom_bar(mapping = aes(x = age, fill = job), position = "dodge")ggplot(bank) +
geom_bar(mapping = aes(x = age, fill = job), position = "fill") +
coord_flip()ggplot(bank) +
geom_bar(mapping = aes(x = age, fill = job), position = "fill") +
coord_flip() +
scale_x_reverse()ggplot(bank) +
geom_bar(mapping = aes(x = age, fill = job), position = "fill") +
coord_polar()ggplot(data = bank, mapping = aes(x = job, fill = education)) + geom_bar()ggplot(data = bank, mapping = aes(x = job, fill = education)) + geom_bar() + coord_flip()ggplot(data = bank, mapping = aes(x = reorder(job, age, FUN = mean), fill = education)) +
geom_bar() +
coord_flip()ggplot(data = bank, mapping = aes(x = reorder(job, age, FUN = mean), fill = education)) +
geom_bar() +
coord_flip()# If we just to order job according to alphabetical order.
# use rev(levels(...))
ggplot(data = bank, mapping = aes(x = reorder(job, age, FUN = median), fill = education)) +
geom_bar() +
scale_x_discrete(limit = rev(levels(bank$job))) +
coord_flip()# If we want to sort the job acccording to median age
# And also add age range and median age.
ggplot(data = bank, mapping = aes(x = reorder(job, age, FUN = median), fill = education)) +
geom_bar() +
scale_x_discrete(limit = rev(levels(reorder(bank$job, bank$age, FUN = median)))) +
geom_line(aes(x = job, y = age)) +
geom_point(data = group_by(bank, job) %>% summarize(age = median(age)) %>% ungroup, aes(x = job, y = age), inherit.aes = FALSE) +
xlab("Job sorted according to\nMedian age\n(Top - younger)") +
coord_flip()ggplot(data = bank) +
stat_summary(
mapping = aes(x = age, y = balance),
fun.ymin = min,
fun.ymax = max,
fun.y = median
)# If just want to
ggplot(data = bank) +
geom_point(mapping = aes(x = age, y = duration)) +
facet_wrap(~ education, nrow = 2)# doesn't look great because we have so many jobs.
ggplot(bank, aes(pdays)) + geom_histogram() + facet_grid(job ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Not a good choice, neither
ggplot(bank, aes(pdays)) + geom_histogram() + facet_grid(. ~ job)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Can we do better?
ggplot(bank, aes(campaign)) + geom_histogram() + facet_grid(. ~ job)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(bank, aes(duration)) + geom_histogram(aes(color = job)) + facet_grid(. ~ job)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# facet with points is good
ggplot(bank, aes(balance, age)) + geom_point() + facet_grid(. ~ job)# do better
ggplot(bank, aes(balance, age)) + geom_point(aes(color = job)) + facet_grid(. ~ job)
# Can we apply points between age and balance?
ggplot(bank, aes(age, balance, color = job)) + geom_point() + geom_smooth() + facet_grid(. ~ job)
## `geom_smooth()` using method = 'loess'
# Smooth line is mixed with points
ggplot(bank, aes(age, balance)) + geom_point(aes(color = job)) + geom_smooth() + facet_grid(. ~ job)
## `geom_smooth()` using method = 'loess'ggplot(bank, aes(previous)) + geom_histogram() + facet_grid(. ~ job)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.# v.s.
ggplot(bank, aes(previous)) + geom_histogram(aes(fill = job)) + facet_grid(. ~ job)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.# v.s.
ggplot(bank, aes(previous)) + geom_density(aes(fill = job))ggplot(bank, aes(previous)) + geom_histogram() + facet_grid(. ~ marital)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.# v.s.
ggplot(bank, aes(previous)) + geom_density(aes(fill = marital))# v.s.
ggplot(bank, aes(previous)) + geom_density(aes(fill = marital), alpha = 0.7) + xlim(1, 10)
## Warning: Removed 3725 rows containing non-finite values (stat_density).# Levels gives more control to the layer and style.
cutpoints <- quantile(bank$age, seq(0, 1, length = 4), na.rm = TRUE)
# The age_group variable is now a categorical factor variable containing 3 levels, indicating the ranges of age.
bank$age_group <- cut(bank$age, cutpoints)
levels(bank$age_group)
## [1] "(19,35]" "(35,45]" "(45,87]"
# Use facet_wrap to specify nrow/ncol.
ggplot(bank, aes(age, duration)) +
geom_point(alpha = 1/3) +
facet_wrap(job ~ age_group, nrow = 2) + # ncol = number of cuts 3 = length(levels(bank$age_group))
geom_smooth(method="lm", se=FALSE, col="steelblue") +
theme_bw(base_family = "Avenir", base_size = 10) +
labs(x = "age", y = expression("log " * Duration)) +
scale_y_log10() +
labs(title = "Bank Clients")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x
## $y, : font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font databasedefault theme is theme_gray()
g <- ggplot(bank, aes(x = age, y = log10(duration)))
g + geom_point(aes(color = job), size = 4, alpha = 1/2) + theme_bw()g + geom_point(aes(color = job), size = 4, alpha = 1/2) + theme_void()
g + geom_point(aes(color = job), size = 4, alpha = 1/2) + theme_minimal() +
labs(title = "Duration is longer with age",
subtitle = "some random plot",
caption = "from MFE") +
labs(x = "age", y = expression("log " * Duration))ggthemes provides many other themes.library(ggthemes)
## [1] "theme_base" "theme_calc"
## [3] "theme_economist" "theme_economist_white"
## [5] "theme_excel" "theme_few"
## [7] "theme_fivethirtyeight" "theme_foundation"
## [9] "theme_gdocs" "theme_hc"
## [11] "theme_igray" "theme_map"
## [13] "theme_pander" "theme_par"
## [15] "theme_solarized" "theme_solarized_2"
## [17] "theme_solid" "theme_stata"
## [19] "theme_tufte" "theme_wsj"ggplot(data = ) +
render*() functionsrender*() arguments are code used to build and rebuild objectrender*() function re-runs the code with every change in the input
output$hist <- renderPlot({
hist(data())
})
output$stat <- renderPlot({
summary(data())
})
Use of isolate to peek the value not to react to its change every time.
actionButton(inputId = “go”, label = “Click me”)
observeEvent(input\(go, { # Use of isolate to *peek* the value not to react to it. num_input <- isolate(input\)num_input)
output\(plot1 <- renderPlot({ # if we use input\)num_input here, we build a direct reactive link # between output\(plot1 and input\)num_input. This is not what we designed. plot(1:number_input, runif(num_input)) })
output$table1 <- renderTable({ … }) })
renderUI# shiny-34-renderUI.R
library(shiny)
ui <- fluidPage(
uiOutput("p1")
)
server <- function(input, output, session) {
output$p1 <- renderUI({
tagList(
h1("HTML t1"),
uiOutput("t1"),
h1("Plot p1"),
plotOutput("p1")
)
})
}
shinyApp(ui, server)
You can use newly created UI immeidately
# shiny-34-renderUI.R
library(shiny)
ui <- fluidPage(
uiOutput("p1")
)
server <- function(input, output, session) {
output$p1 <- renderUI({
tl <- tagList(
h1("HTML t1"),
uiOutput("t1"),
h1("Plot p1p1"),
plotOutput("p1p1")
)
tl
})
output$t1 <- renderUI({
tagList(
h1("HTML p1t1 inside t1"),
plotOutput("p1t1")
)
})
output$p1t1 <- renderPlot({
# hist(runif(10000))
plot(1:100, runif(100))
})
output$p1p1 <- renderPlot({
plot(1:100, runif(100))
})
}
shinyApp(ui, server)
library(shiny)
library(knitr)
library(kableExtra)
ui <- fluidPage(
numericInput("num", "Num", 3),
uiOutput("p1"),
hr(),
tableOutput("p2")
)
server <- function(input, output, session) {
observe({
row_num <- input$num
output$p1 <- renderUI({
tagList(
tags$h1("This is a header"),
{
if (row_num > 0 & row_num < 7) {
hx <- paste0("h", row_num)
(tags[[hx]])(toupper(hx))
} else {
(tags[["h6"]])(toupper("h6"))
}
},
numericInput("num_plot", "Give a number", value = round(runif(1, min = 0, max = nrow(iris)), 0), min = 0, max = nrow(iris)),
plotOutput("plot"),
tags$h3("kable can't be used with tagList."),
kable(iris[1:row_num, , drop = T], format = "html")
)
})
# num_plot is the newly created input.
# plot is the newly created output.
# You can use the newly created input/output immediately
# This is particularly useful for creating multiple plots and tables.
output$plot <- renderPlot({
if (input$num_plot > 0) {
ggplot(iris[1:input$num_plot, , drop = F], aes(x = Sepal.Length, y = Petal.Width)) +
geom_point() +
geom_smooth() +
theme_minimal()
}
})
# Use anything together with kable, use function() { paste0(...) }
output$p2 <- function() {
paste0(
tags$h1("kable is used inside a function()"),
kable(iris[1:row_num, , drop = T], format = "html"))
}
})
}
shinyApp(ui, server)
uiOutput(“h1”) output$h1 <- renderUI({ tagList( sliderInput(“n”, “N”, 1, 1000, 500), textInput(“label”, “Label”) ) })
updateSelectionInput(…) updateNumericInput(…)
update***Inputlibrary(shiny)
ui <- fluidPage(
uiOutput("p1"),
verbatimTextOutput("o1")
)
scenarios <- c(-100, -50, 0, 50, 100)
server <- function(input, output, session) {
output$p1 <- renderUI({
tagList(
numericInput("shock", "Shock", value = round(runif(1) * 1000), 0),
actionButton("add", "Add"),
checkboxGroupInput("scenarios", "Scenarios", choices = c(), selected = c())
)
})
updateCheckboxGroupInput(session, "scenarios",
choices = scenarios,
selected = scenarios)
observeEvent(input$add, {
shock <- isolate(input$shock)
if (!(shock %in% scenarios)) {
scenarios <<- sort(c(scenarios, shock))
updateCheckboxGroupInput(session, "scenarios",
choices = scenarios,
selected = scenarios)
}
updateNumericInput(session, "shock", value = round(runif(1) * 1000))
})
output$o1 <- renderPrint({
x <- input$scenarios
str(x)
cat(paste0("length: ", length(x), "\n"))
cat(paste0(x, "\n"))
})
}
shinyApp(ui, server)
If we need to generate multiple plots. ggplot has a companion package to arrange plots.
SxS: side by side
library(gridExtra)
p1 <- ggplot(bank) + geom_bar(mapping = aes(x = age, fill = job), position = "fill") + coord_polar()
p2 <- ggplot(bank) + geom_bar(mapping = aes(x = age, fill = education), position = "fill") + coord_polar()
grid.arrange(p1, p2, ncol=2, nrow=1)grid.arrange(p1, p2, ncol=2, nrow=1, widths = c(4,2))grid.arrange(p1, p2, ncol=1, nrow=2, heights = c(4,2))a bit more complicated
library(tibble)
library(ggplot2)
library(gridExtra)
df <- tibble(x = rnorm(1000), y = rnorm(1000))
hist_top <- ggplot(df, aes(x = x)) + geom_density()
empty <-
ggplot()+geom_point(aes(1,1), colour="white")+
theme(axis.ticks=element_blank(),
panel.background=element_blank(),
axis.text.x=element_blank(), axis.text.y=element_blank(),
axis.title.x=element_blank(), axis.title.y=element_blank())
scatter <- ggplot(df, aes(x = x, y = y)) + geom_point()
hist_right <- ggplot(df, aes(x = y)) + geom_density() + coord_flip()
grid.arrange(hist_top, empty, scatter, hist_right, ncol=2, nrow=2, widths=c(3.5, 0.7), heights=c(1, 4))kable is provided by knitr package. kableExtra enhance it with more functions. So we load both packages.
```{r shiny_block}
library(knitr)
library(kableExtra)
# This is HTML output
kable(df, format = "html")
# Use function() { } to output html
output$p1 <- function() {
kable(df, format = "html")
}
```
Get all styles from here https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
style
mtcars[1:10, , drop = F] %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
font_size = 12,
full_width = F, # True for left-to-right width
position = "left") # if full_width == F| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
| Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
| Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
| Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
| Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
mtcars[1:10, , drop = F] %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
font_size = 12,
full_width = F, # True for left-to-right width
position = "left") %>% # if full_width == F
column_spec(1, bold = T, border_right = T) %>%
column_spec(2, width = "30em", background = "yellow")| mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
| Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
| Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
| Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
| Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
mtcars[1:10, , drop = F] %>%
kable("html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
font_size = 12,
full_width = F, # True for left-to-right width
position = "left") %>% # if full_width == F
column_spec(5:7, bold = T) %>%
row_spec(3:5, bold = T, color = "white", background = "#D7261E") | mpg | cyl | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Mazda RX4 | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.620 | 16.46 | 0 | 1 | 4 | 4 |
| Mazda RX4 Wag | 21.0 | 6 | 160.0 | 110 | 3.90 | 2.875 | 17.02 | 0 | 1 | 4 | 4 |
| Datsun 710 | 22.8 | 4 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
| Hornet 4 Drive | 21.4 | 6 | 258.0 | 110 | 3.08 | 3.215 | 19.44 | 1 | 0 | 3 | 1 |
| Hornet Sportabout | 18.7 | 8 | 360.0 | 175 | 3.15 | 3.440 | 17.02 | 0 | 0 | 3 | 2 |
| Valiant | 18.1 | 6 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
| Duster 360 | 14.3 | 8 | 360.0 | 245 | 3.21 | 3.570 | 15.84 | 0 | 0 | 3 | 4 |
| Merc 240D | 24.4 | 4 | 146.7 | 62 | 3.69 | 3.190 | 20.00 | 1 | 0 | 4 | 2 |
| Merc 230 | 22.8 | 4 | 140.8 | 95 | 3.92 | 3.150 | 22.90 | 1 | 0 | 4 | 2 |
| Merc 280 | 19.2 | 6 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
vol_surface <- tibble(tenor = c("1M", "2M", "3M", "6M"),
`0.1` = c(0.472, 0.435, 0.391, 0.29),
`0.25` = c(0.431, 0.41, 0.337, 0.28),
`0.5` = c(0.398, 0.30, 0.251, 0.2),
`0.75` = c(0.428, 0.336, 0.307, 0.249),
`0.9` = c(0.457, 0.411, 0.391, 0.278))
# "magma" (or "A"), "inferno" (or "B"), "plasma" (or "C"), and "viridis" (or "D", the default option).
library(knitr)
library(kableExtra)
gather(vol_surface, key = "delta", value = "vol", -tenor) %>%
# cell_spec takes column vol. spec_color also takes column vol values into consideration.
# We take half of the spectrurm - yellow to red.
mutate(vol = cell_spec(
vol, "html", color = "black", bold = T,
background = spec_color(vol, begin = 0.5, end = 1, option = "A", direction = -1))) %>%
spread(key = "delta", value = "vol") %>%
kable("html", escape = F, align = "c") %>%
kable_styling("striped", full_width = F)| tenor | 0.1 | 0.25 | 0.5 | 0.75 | 0.9 |
|---|---|---|---|---|---|
| 1M | 0.472 | 0.431 | 0.398 | 0.428 | 0.457 |
| 2M | 0.435 | 0.41 | 0.3 | 0.336 | 0.411 |
| 3M | 0.391 | 0.337 | 0.251 | 0.307 | 0.391 |
| 6M | 0.29 | 0.28 | 0.2 | 0.249 | 0.278 |
mtcars[1:10, 1:2] %>%
mutate(
car = row.names(.),
mpg = cell_spec(mpg, "html", color = ifelse(mpg > 20, "red", "blue")),
cyl = cell_spec(cyl, "html", color = "white", align = "c", angle = 45,
background = factor(cyl, c(4, 6, 8),
c("#666666", "#999999", "#BBBBBB")))
) %>%
select(car, mpg, cyl) %>%
kable("html", escape = F) %>%
kable_styling("striped", full_width = F)
volatility surface.
Include all cells for colors, using gather, cell_spec, then spread
iris[1:10, ] %>% mutate_if(is.numeric, function(x) { cell_spec(x, “html”, bold = T, color = spec_color(x, end = 0.9), font_size = spec_font_size(x)) }) %>% mutate(Species = )) %>% kable(“html”, escape = F, align = “c”) %>% kable_styling(“striped”, full_width = F)
models <- mtcars %>% split(.$cyl) %>% map(function(df) lm(mpg ~ wt, data = df))
models <- mtcars %>% split(.$cyl) %>% map(~lm(mpg ~ wt, data = .))
library(modelr)
mod <- lm(log(balance) ~ log(age), data = bank)
bank1 <- filter(bank, default == “no” & balance > 0) mod <- lm(log(balance) ~ log(age), data = bank1)
bank2 <- bank1 %>% add_residuals(mod) %>% mutate(resid = exp(resid))
ggplot(data = bank2) + geom_point(mapping = aes(x = age, y = resid))
First, you define a family of models that express a precise, but generic, pattern that you want to capture. For example, the pattern might be a straight line, or a quadatric curve. You will express the model family as an equation like y = a_1 * x + a_2 or y = a_1 * x ^ a_2. Here, x and y are known variables from your data, and a_1 and a_2 are parameters that can vary to capture different patterns.
Next, you generate a fitted model by finding the model from the family that is the closest to your data. This takes the generic model family and makes it specific, like y = 3 * x + 7 or y = 9 * x ^ 2.
install.package(“purrr”, “modelr”) library(purrr) library(modelr)
sim1
model1 <- function(a, data) { a[1] + data$x * a[2] }
measure_distance <- function(mod, data) { diff <- data\(y - model1(mod, data) sqrt(mean(diff ^ 2)) } best <- optim(c(0, 0), measure_distance, data = sim1) best\)par
sim1_mod <- lm(y ~ x, data = sim1) coef(sim1_mod)
sim1 %>% data_grid(x) %>% # generate data set. add_predictions(sim1_mod)
ggplot(sim1, aes(x)) + geom_point(aes(y = y)) + geom_line(aes(y = pred), data = grid, colour = “red”, size = 1)
add_residuals(sim1_mod, sim1)
ggplot(sim1, aes(resid)) + geom_freqpoly(binwidth = 0.5)
ggplot(bank) + geom_bar(aes(x = age, fill = y))
model_matrix(bank, y ~ age)
bank_mod <- lm(y ~ age, data = mutate(bank, y = ifelse(y == “yes”, 1, 0)))
mutate(bank, y = ifelse(y == “yes”, 1, 0)) %>% data_grid(age) %>% add_predictions(bank_mod) mutate(bank, y = ifelse(y == “yes”, 1, 0)) %>% data_grid(age) %>% add_predictions(bank_mod) %>% ggplot(aes(x = age, y = pred)) + geom_point()
mutate(bank, y = ifelse(y == “yes”, 1, 0)) %>% add_residuals(bank_mod) %>% ggplot(aes(resid)) + geom_freqpoly(binwidth = 0.05)
bank_mod <- lm(y ~ age * job, data = mutate(bank, y = ifelse(y == “yes”, 1, 0))) mutate(bank, y = ifelse(y == “yes”, 1, 0)) %>% data_grid(age, job) %>% add_predictions(bank_mod) %>% ggplot(aes(x = age, colour = job)) + geom_line(aes(y = pred))
mutate(bank, y = ifelse(y == “yes”, 1, 0)) %>% add_residuals(bank_mod) %>% ggplot(aes(x = age, resid, colour = job)) + geom_point()
model_matrix(df, y ~ I(x^2) + x)
with different jobs
by_job <- group_by(bank, job) %>% nest() job_balance <- function(df) { lm(balance ~ age, data = df) } models <- mutate(by_job, model = purrr::map(data, job_balance))
by_job_res <- models %>% mutate(resids = map2(data, model, add_residuals))
resids <- unnest(by_job_res, resids)
resids %>% ggplot(aes(age, resid)) + geom_line(aes(colour = job)) + geom_smooth(se = FALSE)
resids %>% ggplot(aes(age, resid, group = job)) + geom_line(alpha = 1 / 3) + facet_wrap(~job)
mod1 <- lm(y ~ ns(x, 1), data = sim5)
mod1 <- lm(y ~ x1 + x2, data = sim3) mod2 <- lm(y ~ x1 * x2, data = sim3)
library(ggplot2)
set.seed(123)
N <- 1000
x <- rnorm(N)
f <- function(x) 50*x^2/(1 + 4*x) # data-simulating function
y <- f(x) + rnorm(N, sd=3)
point_data <- data.frame(x, y)
library(tidyverse)
ggplot(point_data, aes(x=x, y=y)) +
geom_point() +
ylim(-100, 100) +
ggtitle("simulated data points")
## Warning: Removed 3 rows containing missing values (geom_point).
fit_pade <- function(point_data){
fit <- lm(y ~ x + I(x^2) + I(y*x) + I(y*x^2), point_data)
lm_coef <- as.list(coef(fit))
names(lm_coef) <- c("a0", paste0(rep(c('a','b'), each=2), 1:2))
with(lm_coef, function(x)(a0 + a1*x + a2*x^2)/(1 - b1*x - b2*x^2))
}
plot_fitted_function <- function(x_data, fitted_fun, title){
x_data$y_hat <- fitted_fun(x_data$x)
g <- ggplot(x_data, aes(x=x, y=y)) +
geom_point() + ylim(-100, 100) +
geom_line(aes(y=y_hat), col="red", size=1) +
ggtitle(title)
plot(g)
}
pade_approx <- fit_pade(point_data)
plot_fitted_function(point_data, pade_approx, title="fitted function")
## Warning: Removed 3 rows containing missing values (geom_point).
function_list <- list(
function(x) (100 - 50*x - 100*x^2)/(1 - 50*x - 5*x^2),
function(x) (100 - 50*x - 100*x^2)/(1 - 10*x - 5*x^2),
function(x) (100 - 50*x - 100*x^2)/(1 - 10*x - 10*x^2)
)
for (f in function_list){
sim_data <- point_data %>% mutate(y=f(x) +
rnorm(nrow(point_data), sd=3))
plot_fitted_function(sim_data, fit_pade(sim_data),
title=as.character(deparse(f))[2])
}
## Warning: Removed 14 rows containing missing values (geom_point).## Warning: Removed 96 rows containing missing values (geom_point).
## Warning: Removed 86 rows containing missing values (geom_point).
dev.off() data.model <- select(bank, balance, education) %>% mutate(education = factor(education)) plot(data.model) combination <- model.matrix(~ . - 1, data.model) data.model\(cl <- kmeans(combination, 4)\)cluster with(data.model, plot(balance, education, col = cl))
dev.off() data.model <- select(bank, job, education) %>% mutate(job = factor(job), education = factor(education)) plot(data.model) combination <- model.matrix(~ . - 1, data.model) data.model\(cl <- kmeans(combination, 2)\)cluster with(data.model, plot(job, education, col = cl))
dev.off() data.model <- select(bank, balance, duration) plot(data.model) data.model\(cl <- kmeans(data.model[, 1:2], 4)\)cluster with(data.model, plot(balance, duration, col = cl)) with(data.model, text(balance, duration, col=cl))
library(xts) xts object => store prices and retrieve.
library(fOption)
Option valuation.
How to value a portfolio of stocks?
How to value a portfolio of options?
How to simulate portfolio gain.
How to do SVD analysis?
sample option portfolio sample stock portfolio